In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
In [2]:
df=pd.read_csv('1976-2020-president.csv')
df
Out[2]:
year state state_po state_fips state_cen state_ic office candidate party_detailed writein candidatevotes totalvotes version notes party_simplified
0 1976 ALABAMA AL 1 63 41 US PRESIDENT CARTER, JIMMY DEMOCRAT False 659170 1182850 20210113 NaN DEMOCRAT
1 1976 ALABAMA AL 1 63 41 US PRESIDENT FORD, GERALD REPUBLICAN False 504070 1182850 20210113 NaN REPUBLICAN
2 1976 ALABAMA AL 1 63 41 US PRESIDENT MADDOX, LESTER AMERICAN INDEPENDENT PARTY False 9198 1182850 20210113 NaN OTHER
3 1976 ALABAMA AL 1 63 41 US PRESIDENT BUBAR, BENJAMIN ""BEN"" PROHIBITION False 6669 1182850 20210113 NaN OTHER
4 1976 ALABAMA AL 1 63 41 US PRESIDENT HALL, GUS COMMUNIST PARTY USE False 1954 1182850 20210113 NaN OTHER
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4282 2020 WYOMING WY 56 83 68 US PRESIDENT JORGENSEN, JO LIBERTARIAN False 5768 278503 20210113 NaN LIBERTARIAN
4283 2020 WYOMING WY 56 83 68 US PRESIDENT PIERCE, BROCK INDEPENDENT False 2208 278503 20210113 NaN OTHER
4284 2020 WYOMING WY 56 83 68 US PRESIDENT NaN NaN True 1739 278503 20210113 NaN OTHER
4285 2020 WYOMING WY 56 83 68 US PRESIDENT OVERVOTES NaN False 279 278503 20210113 NaN OTHER
4286 2020 WYOMING WY 56 83 68 US PRESIDENT UNDERVOTES NaN False 1459 278503 20210113 NaN OTHER

4287 rows × 15 columns

In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4287 entries, 0 to 4286
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   year              4287 non-null   int64  
 1   state             4287 non-null   object 
 2   state_po          4287 non-null   object 
 3   state_fips        4287 non-null   int64  
 4   state_cen         4287 non-null   int64  
 5   state_ic          4287 non-null   int64  
 6   office            4287 non-null   object 
 7   candidate         4000 non-null   object 
 8   party_detailed    3831 non-null   object 
 9   writein           4284 non-null   object 
 10  candidatevotes    4287 non-null   int64  
 11  totalvotes        4287 non-null   int64  
 12  version           4287 non-null   int64  
 13  notes             0 non-null      float64
 14  party_simplified  4287 non-null   object 
dtypes: float64(1), int64(7), object(7)
memory usage: 502.5+ KB
In [4]:
df.describe()
Out[4]:
year state_fips state_cen state_ic candidatevotes totalvotes version notes
count 4287.000000 4287.000000 4287.000000 4287.000000 4.287000e+03 4.287000e+03 4287.0 0.0
mean 1999.080942 28.616982 53.665034 39.754840 3.119076e+05 2.366924e+06 20210113.0 NaN
std 14.220014 15.616459 26.029189 22.772216 7.648011e+05 2.465008e+06 0.0 NaN
min 1976.000000 1.000000 11.000000 1.000000 0.000000e+00 1.235740e+05 20210113.0 NaN
25% 1988.000000 16.000000 33.000000 22.000000 1.177000e+03 6.522740e+05 20210113.0 NaN
50% 2000.000000 28.000000 53.000000 42.000000 7.499000e+03 1.569180e+06 20210113.0 NaN
75% 2012.000000 41.000000 81.000000 61.000000 1.992415e+05 3.033118e+06 20210113.0 NaN
max 2020.000000 56.000000 95.000000 82.000000 1.111025e+07 1.750088e+07 20210113.0 NaN
In [5]:
df.columns
Out[5]:
Index(['year', 'state', 'state_po', 'state_fips', 'state_cen', 'state_ic',
       'office', 'candidate', 'party_detailed', 'writein', 'candidatevotes',
       'totalvotes', 'version', 'notes', 'party_simplified'],
      dtype='object')
In [6]:
#Fill all the null values with NO_Data 
df.fillna('NO_DATA',inplace=True)
df
Out[6]:
year state state_po state_fips state_cen state_ic office candidate party_detailed writein candidatevotes totalvotes version notes party_simplified
0 1976 ALABAMA AL 1 63 41 US PRESIDENT CARTER, JIMMY DEMOCRAT False 659170 1182850 20210113 NO_DATA DEMOCRAT
1 1976 ALABAMA AL 1 63 41 US PRESIDENT FORD, GERALD REPUBLICAN False 504070 1182850 20210113 NO_DATA REPUBLICAN
2 1976 ALABAMA AL 1 63 41 US PRESIDENT MADDOX, LESTER AMERICAN INDEPENDENT PARTY False 9198 1182850 20210113 NO_DATA OTHER
3 1976 ALABAMA AL 1 63 41 US PRESIDENT BUBAR, BENJAMIN ""BEN"" PROHIBITION False 6669 1182850 20210113 NO_DATA OTHER
4 1976 ALABAMA AL 1 63 41 US PRESIDENT HALL, GUS COMMUNIST PARTY USE False 1954 1182850 20210113 NO_DATA OTHER
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4282 2020 WYOMING WY 56 83 68 US PRESIDENT JORGENSEN, JO LIBERTARIAN False 5768 278503 20210113 NO_DATA LIBERTARIAN
4283 2020 WYOMING WY 56 83 68 US PRESIDENT PIERCE, BROCK INDEPENDENT False 2208 278503 20210113 NO_DATA OTHER
4284 2020 WYOMING WY 56 83 68 US PRESIDENT NO_DATA NO_DATA True 1739 278503 20210113 NO_DATA OTHER
4285 2020 WYOMING WY 56 83 68 US PRESIDENT OVERVOTES NO_DATA False 279 278503 20210113 NO_DATA OTHER
4286 2020 WYOMING WY 56 83 68 US PRESIDENT UNDERVOTES NO_DATA False 1459 278503 20210113 NO_DATA OTHER

4287 rows × 15 columns

In [7]:
state=df['state'].unique()
p=px.histogram(df,x='state',y='totalvotes',title='State Wise Total Votes')
p.show()
In [8]:
plt.hist(df['totalvotes'])
plt.title('total_votes in election')
plt.show()
In [9]:
plt.hist(df['candidatevotes'])  
plt.title('candidate_votes in election')
plt.show()
In [10]:
data=df.groupby(['year','party_simplified'])['party_simplified'].count().unstack()
plt.figure(figsize=(10,6))
data.plot(kind='bar',stacked=True)
Out[10]:
<AxesSubplot:xlabel='year'>
<Figure size 720x432 with 0 Axes>
In [11]:
#Voters Turn out 
voters_number=df.groupby('year')['totalvotes'].sum()
x=voters_number.index
y=voters_number.values
plt.figure(figsize=(10,6))
plt.plot(x,y)
plt.xlabel('Year')
plt.ylabel('Total number of votes casted in Year')
plt.title('Voter Turn Out ')
plt.show()
In [12]:
parties=list(df['party_simplified'].unique())
parties
Out[12]:
['DEMOCRAT', 'REPUBLICAN', 'OTHER', 'LIBERTARIAN']
In [13]:
df_year_party=pd.DataFrame(columns=["year"] + parties)
df_year_party
Out[13]:
year DEMOCRAT REPUBLICAN OTHER LIBERTARIAN
In [14]:
for year in df.year.unique():
    row = {"year": year}
    for party in parties:
        row[party] = df[(df.year == year) & (df.party_simplified == party)].candidatevotes.sum()
    df_year_party = df_year_party.append(row, ignore_index=True)
/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

In [15]:
df_year_party["votes_casted"] = df_year_party[parties].sum(axis=1)

df_year_party = df_year_party.astype(np.uint32)
df_year_party.year = df_year_party.year.astype(np.uint16)

df_year_party.set_index('year', inplace=True)
In [16]:
df_year_party
Out[16]:
DEMOCRAT REPUBLICAN OTHER LIBERTARIAN votes_casted
year
1976 40680446 38870893 1954379 95626 81601344
1980 35480948 43642639 6505863 867401 86496851
1984 37449813 54166829 811015 227204 92654861
1988 41716679 48642640 817798 409708 91586825
1992 44856747 38798913 20663272 280848 104599780
1996 47295351 39003697 9625419 465351 96389818
2000 50830580 50311372 4071625 380405 105593982
2004 58894561 61872711 1212870 369308 122349450
2008 69338846 59613835 1956116 510456 131419253
2012 65752017 60670117 1501463 1216400 129139997
2016 65677288 62692670 4292059 4125170 136787187
2020 81268908 74216146 1246094 1797355 158528503
In [17]:
#Total number of votes
df['totalvotes'].sum()
Out[17]:
10147003830
In [18]:
#Function to extract the data of specific party 

def extract_party_data(party_name):
    t=df['party_simplified']==party_name
    df_new=df[t]
    return df_new

DEMOCRATIC PARTY ANALYSIS

In [19]:
dm=extract_party_data('DEMOCRAT')
dm
Out[19]:
year state state_po state_fips state_cen state_ic office candidate party_detailed writein candidatevotes totalvotes version notes party_simplified
0 1976 ALABAMA AL 1 63 41 US PRESIDENT CARTER, JIMMY DEMOCRAT False 659170 1182850 20210113 NO_DATA DEMOCRAT
8 1976 ALASKA AK 2 94 81 US PRESIDENT CARTER, JIMMY DEMOCRAT False 44058 123574 20210113 NO_DATA DEMOCRAT
12 1976 ARIZONA AZ 4 86 61 US PRESIDENT CARTER, JIMMY DEMOCRAT False 295602 742719 20210113 NO_DATA DEMOCRAT
19 1976 ARKANSAS AR 5 71 42 US PRESIDENT CARTER, JIMMY DEMOCRAT False 498604 767535 20210113 NO_DATA DEMOCRAT
24 1976 CALIFORNIA CA 6 93 71 US PRESIDENT CARTER, JIMMY DEMOCRAT False 3742284 7803770 20210113 NO_DATA DEMOCRAT
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4252 2020 VIRGINIA VA 51 54 40 US PRESIDENT BIDEN, JOSEPH R. JR DEMOCRAT False 2413568 4460524 20210113 NO_DATA DEMOCRAT
4256 2020 WASHINGTON WA 53 91 73 US PRESIDENT BIDEN, JOSEPH R. JR DEMOCRAT False 2369612 4087631 20210113 NO_DATA DEMOCRAT
4263 2020 WEST VIRGINIA WV 54 55 56 US PRESIDENT BIDEN, JOSEPH R. JR DEMOCRAT False 235984 794652 20210113 NO_DATA DEMOCRAT
4267 2020 WISCONSIN WI 55 35 25 US PRESIDENT BIDEN, JOSEPH R. JR DEMOCRAT False 1630866 3298041 20210113 NO_DATA DEMOCRAT
4280 2020 WYOMING WY 56 83 68 US PRESIDENT BIDEN, JOSEPH R. JR DEMOCRAT False 73491 278503 20210113 NO_DATA DEMOCRAT

615 rows × 15 columns

In [20]:
#Total Votes of democratic party 
democratic_votes=dm['totalvotes'].sum()
democratic_votes
Out[20]:
1344886700
In [21]:
#Candidates who took part in presidential election in democratic party
p=dm['candidate'].unique()
p
Out[21]:
array(['CARTER, JIMMY', 'MONDALE, WALTER', 'DUKAKIS, MICHAEL',
       'CLINTON, BILL', 'GORE, AL', 'KERRY, JOHN', 'OTHER',
       'OBAMA, BARACK H.', 'CLINTON, HILLARY', 'NO_DATA',
       'BIDEN, JOSEPH R. JR'], dtype=object)
In [22]:
#Calculate the total number of votes obtained for each candidate and get the top 5 
x=dm.groupby('candidate')
plt.figure(figsize=(10,6))
plt.xlabel('Candidate from party')
plt.ylabel('Total number of votes obtained')
x['totalvotes'].sum().sort_values(ascending=False).head(5).plot(kind='bar',title='Total number of votes obtained for each candidate ')
Out[22]:
<AxesSubplot:title={'center':'Total number of votes obtained for each candidate '}, xlabel='candidate', ylabel='Total number of votes obtained'>
In [23]:
#Calculate the total number of votes obtained as per state and get the top 5
x1=dm.groupby('state')
plt.figure(figsize=(10,6))
plt.xlabel('State')
plt.ylabel('Total number of votes obtained')
x1['totalvotes'].sum().sort_values(ascending=False).head(5).plot(kind='bar',title='Total number of votes obtained from a state ')
Out[23]:
<AxesSubplot:title={'center':'Total number of votes obtained from a state '}, xlabel='state', ylabel='Total number of votes obtained'>
In [24]:
x2=dm.groupby('year')
plt.figure(figsize=(10,6))
plt.xlabel('Year')
plt.ylabel('Total number of votes obtained')
x2['totalvotes'].sum().sort_values(ascending=False).plot(kind='bar',title='Total number of votes obtained from a state ')
Out[24]:
<AxesSubplot:title={'center':'Total number of votes obtained from a state '}, xlabel='year', ylabel='Total number of votes obtained'>
In [25]:
#Extract data as per state 

def extract_party_data(state_name):
    t1=df['state']==state_name
    df_new1=df[t1]
    return df_new1
In [26]:
m=extract_party_data('CALIFORNIA')
m
Out[26]:
year state state_po state_fips state_cen state_ic office candidate party_detailed writein candidatevotes totalvotes version notes party_simplified
23 1976 CALIFORNIA CA 6 93 71 US PRESIDENT FORD, GERALD REPUBLICAN False 3882244 7803770 20210113 NO_DATA REPUBLICAN
24 1976 CALIFORNIA CA 6 93 71 US PRESIDENT CARTER, JIMMY DEMOCRAT False 3742284 7803770 20210113 NO_DATA DEMOCRAT
25 1976 CALIFORNIA CA 6 93 71 US PRESIDENT MACBRIDE, ROGER INDEPENDENT False 56388 7803770 20210113 NO_DATA OTHER
26 1976 CALIFORNIA CA 6 93 71 US PRESIDENT MADDOX, LESTER AMERICAN INDEPENDENT PARTY False 51098 7803770 20210113 NO_DATA OTHER
27 1976 CALIFORNIA CA 6 93 71 US PRESIDENT WRIGHT, MARGARET PEACE & FREEDOM False 41731 7803770 20210113 NO_DATA OTHER
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3779 2020 CALIFORNIA CA 6 93 71 US PRESIDENT CARROLL, BRIAN AMERICAN SOLIDARITY PARTY True 2605 17500881 20210113 NO_DATA OTHER
3780 2020 CALIFORNIA CA 6 93 71 US PRESIDENT JANOS, JAMES G. "JESSE VENTURA" INDEPENDENT True 611 17500881 20210113 NO_DATA OTHER
3781 2020 CALIFORNIA CA 6 93 71 US PRESIDENT MARK CHARLES INDEPENDENT True 559 17500881 20210113 NO_DATA OTHER
3782 2020 CALIFORNIA CA 6 93 71 US PRESIDENT PIERCE, BROCK INDEPENDENT True 185 17500881 20210113 NO_DATA OTHER
3783 2020 CALIFORNIA CA 6 93 71 US PRESIDENT JOSEPH KISHORE INDEPENDENT True 121 17500881 20210113 NO_DATA OTHER

85 rows × 15 columns

In [27]:
#Which party won more number of times in California 
m1=m.groupby('party_simplified')
plt.figure(figsize=(10,6))
plt.xlabel('Party')
plt.ylabel('Total number of votes obtained')
m1['totalvotes'].sum().sort_values(ascending=False).head(5).plot(kind='bar',title='Total number of votes obtained for each candidate ')
Out[27]:
<AxesSubplot:title={'center':'Total number of votes obtained for each candidate '}, xlabel='party_simplified', ylabel='Total number of votes obtained'>
In [28]:
df['state'].value_counts().head(10).plot(kind='bar')
Out[28]:
<AxesSubplot:>
In [29]:
state = df.groupby('state')['totalvotes'].sum().reset_index()
States = state.sort_values('totalvotes',ascending=False).head(10)
fig = px.pie(States, values=States['totalvotes'], names=States['state'])
fig.update_layout(title = 'Top 10 States with Most Votes')
fig.show()